# Import required R libraries
library(fpp3)
Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:
For naive forecasts, we simply set all forecasts to be the value of the last observation – Because a naive forecast is optimal when data follow a random walk
we set each forecast to be equal to the last observed value from the same season of the year
allow the forecasts to increase or decrease over time, where the amount of change over time (called the drift) is set to be the average change seen in the historical data.
# 1960-2017 (57 years total)
aus_pop <- global_economy %>%
filter(Country == "Australia") %>%
mutate(Population = Population/1e6) %>%
select(c(Country, Code, Year, Population))
# Set training data from 1960 to 2002 (43 years)
train <- aus_pop %>%
filter_index("1960" ~ "2002")
# Fit the models
pop_fit <- train %>%
model(
Naive = NAIVE(Population),
`Seasonal naive` = SNAIVE(Population),
`Random walk` = RW(Population ~ drift())
)
# Generate forecasts for 14 years
pop_fc <- pop_fit %>% forecast(h = "14 years")
# Plot forecasts against actual values
pop_fc %>%
autoplot(train, level = NULL) +
autolayer(
filter_index(aus_pop, "2003" ~ "2017"),
colour = "black"
) +
labs(
y = "Population (in millions)",
title = "Forecasts for annual population in Australia"
) +
guides(colour = guide_legend(title = "Forecast"))
remove quarters at the tail of the aus_production tsibble without any data for Bricks
# 1956 Q1 to 2005 Q2 (198 quarters)
aus_bricks <- aus_production %>%
select(c(Quarter, Bricks)) %>% na.omit(aus_bricks)
# Set training data from 1992 to 2006
train <- aus_bricks %>%
filter_index("1956 Q1" ~ "1993 Q4")
# Fit the models
brick_fit <- train %>%
model(
`Naive` = NAIVE(Bricks),
`Seasonal naive` = SNAIVE(Bricks),
`Random walk` = RW(Bricks ~ drift())
)
# Generate forecasts for 14 quarters
brick_fc <- brick_fit %>% forecast(h = 46)
# Plot forecasts against actual values
brick_fc %>%
autoplot(train, level = NULL) +
autolayer(
filter_index(aus_bricks, "1994 Q1" ~ .),
colour = "black"
) +
labs(
y = "Millions",
title = "Forecasts for quarterly brick production"
) +
guides(colour = guide_legend(title = "Forecast"))
# 1972 JUL to 2018 DEC (558 months)
nsw_lambs <- aus_livestock %>%
filter(State == 'New South Wales' &
Animal == 'Lambs') %>%
mutate(Count = Count/1e3) %>%
select(c(Month, Count))
# Set training data from 1972 through 2006
train <- nsw_lambs %>%
filter_index("1972 JUL" ~ "2006 DEC")
# Fit the models
lamb_fit <- train %>%
model(
`Naive` = NAIVE(Count),
`Seasonal naive` = SNAIVE(Count),
`Random walk` = RW(Count ~ drift())
)
# Generate forecasts for 144 months
lamb_fc <- lamb_fit %>% forecast(h = 144)
# Plot forecasts against actual values
lamb_fc %>%
autoplot(train, level = NULL) +
autolayer(
filter_index(nsw_lambs, "2007 JAN" ~ .),
colour = "black"
) +
labs(
y = "Thousands",
title = "Forecasts for lambs slaughtered in NSW"
) +
guides(colour = guide_legend(title = "Forecast"))
# Wealth as a percentage of net disposable income.
hh_wealth <- hh_budget %>%
select(c(Country, Year, Wealth))
# 1995- 2016 (22 years)
# Set training data from 1995 through 2010
train <- hh_wealth %>%
filter_index("1995" ~ "2010")
# Fit the models
hh_w_fit <- train %>%
model(
`Naive` = NAIVE(Wealth),
`Seasonal naive` = SNAIVE(Wealth),
`Random walk` = RW(Wealth ~ drift())
)
# Generate forecasts for 6 years
hh_w_fc <- hh_w_fit %>% forecast(h = 6)
# Plot forecasts against actual values
hh_w_fc %>%
autoplot(train, level = NULL) +
autolayer(
filter_index(hh_wealth, "2011" ~ .),
colour = "black"
) +
labs(
y = "Percentage",
title = "Forecasts for wealth as percentage of net disposable income"
) +
guides(colour = guide_legend(title = "Forecast"))
# 8 states
# 1982 Apr - 2018 Dec (36 years)
aus_ta_to <- aus_retail %>%
filter(Industry == "Takeaway food services") %>%
select(c(State, Month, Turnover))
# Set training data from 1982 Apr through 2008 Dec
train <- aus_ta_to %>%
filter_index("1982 Apr" ~ "2008 DEC")
# Fit the models
austato_fit <- train %>%
model(
`Naive` = NAIVE(Turnover),
`Seasonal naive` = SNAIVE(Turnover),
`Random walk` = RW(Turnover ~ drift())
)
# Generate forecasts for 120 months
austato_fc <- austato_fit %>% forecast(h = 120)
# Plot forecasts against actual values
austato_fc %>%
autoplot(train, level = NULL) +
autolayer(
filter_index(aus_ta_to, "2009 JAN" ~ .),
colour = "black"
) +
labs(
y = "$Million AUD",
title = "Retail turnover in Australian takeaway food"
) +
guides(colour = guide_legend(title = "Forecast"))
Use the Facebook stock price (data set gafa_stock) to do the following:
Produce a time plot of the series.
# Tail shows the last day is 2018-12-31
fb_stock <- gafa_stock %>%
filter(Symbol == 'FB')
fb_stock %>% autoplot(Close) +
labs(
y = "Price (in USD)",
title = "Closing Stock Price of Facebook"
)
Produce forecasts using the drift method and plot them.
# Re-index based on trading days
fb_stock <- gafa_stock %>%
filter(Symbol == "FB") %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE)
# Fit the models
fb_fit <- fb_stock %>%
model(
`Naive Drift` = NAIVE(Close ~ drift()),
`Random Walk` = RW(Close ~ drift())
)
# Produce forecasts for next 253 days (~1 year)
fb_fc <- fb_fit %>% forecast(h = 253)
# Plot the forecasts
fb_fc %>%
autoplot(fb_stock, level = NULL) +
autolayer(fb_stock, Close, colour = "black") +
labs(y = "$US",
title = "Facebook daily closing stock prices"
) +
guides(colour = guide_legend(title = "Forecast"))
Naive Drift and Random Walk have the same line.
Show that the forecasts are identical to extending the line drawn between the first and last observations.
fb_fc %>%
autoplot(fb_stock, level = NULL) +
autolayer(fb_stock, Close, colour = "black") +
labs(y = "$US",
title = "Facebook daily closing stock prices",
) +
guides(colour = guide_legend(title = "Forecast")) +
geom_segment(aes(x=first(fb_stock$day), y=first(fb_stock$Close),
xend=last(fb_stock$day), yend=last(fb_stock$Close)),
linetype='dashed')
Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?
fb_fit_2 <- fb_stock %>%
model(
Mean = MEAN(Close),
Naive = NAIVE(Close),
`Seasonal naive` = SNAIVE(Close),
`Random Walk` = RW(Close)
)
# Produce forecasts for next 253 days (~1 year)
fb_fc_2 <- fb_fit_2 %>% forecast(h = 253)
# Plot the forecasts
fb_fc_2 %>%
autoplot(fb_stock, level = NULL) +
autolayer(fb_stock, Close, colour = "black") +
labs(y = "$US",
title = "Facebook daily closing stock prices",
subtitle = "SUBTITLE HERE") +
guides(colour = guide_legend(title = "Forecast"))
Apply a seasonal naive method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.
# Extract data of interest
recent_production <- aus_production %>%
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production %>% model(SNAIVE(Beer))
# Look at the residuals
fit %>% gg_tsresiduals()
# Look at some forecasts
fit %>% forecast() %>% autoplot(recent_production)
What do you conclude?
Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.
# Extract data of interest
# 1960-2017 (57 years total)
aus_exports <- global_economy %>%
filter(Country == 'Australia')
# Define and estimate a model
fit <- aus_exports %>% model(NAIVE(Exports))
# Look at the residuals
fit %>% gg_tsresiduals()
# Look at some forecasts
fit %>% forecast() %>% autoplot(aus_exports)
# 1956 Q1 to 2005 Q2 (198 quarters)
aus_bricks <- aus_production %>%
select(c(Quarter, Bricks)) %>% na.omit(aus_bricks)
# Define and estimate a model
fit <- aus_bricks %>% model(SNAIVE(Bricks))
# Look at the residuals
fit %>% gg_tsresiduals()
# Look at some forecasts
fit %>% forecast() %>% autoplot(aus_bricks)
For your retail time series (from Exercise 8 in Section 2.10):
set.seed(8675309)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
Create a training dataset consisting of observations before 2011 using
myseries_train <- myseries %>%
filter(year(Month) < 2011)
Check that your data have been split appropriately by producing the following plot.
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).
fit <- myseries_train %>%
model(SNAIVE(Turnover ~ drift()))
Check the residuals.
fit %>% gg_tsresiduals()
Do the residuals appear to be uncorrelated and normally distributed?
Produce forecasts for the test data
fc <- fit %>%
forecast(new_data = anti_join(myseries, myseries_train))
fc %>% autoplot(myseries)
Compare the accuracy of your forecasts against the actual values.
fit %>% accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Queen… Takeaway … SNAIVE… Trai… -6.28e-15 13.2 9.69 -0.972 7.65 0.840 0.882
## # … with 1 more variable: ACF1 <dbl>
fc %>% accuracy(myseries)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(… Quee… Takeaway… Test 39.8 44.9 40.2 13.0 13.1 3.49 2.99 0.761
How sensitive are the accuracy measures to the amount of training data used?